{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import joblib\n",
    "from joblib import Parallel,delayed\n",
    "from sklearn.preprocessing import scale\n",
    "import os\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def concat_sensors(train_no, csv_nos):\n",
    "    '''\n",
    "    描述：\n",
    "        合并所有的sensor文件\n",
    "    '''\n",
    "    input_dir = './sensors_tsfresh_comprehensive/%s/'%train_no\n",
    "    def basis_func(idx):\n",
    "        sensor = joblib.load(input_dir + '%d.lz4'%idx)\n",
    "        return sensor\n",
    "    sensors = Parallel(n_jobs=24, verbose=10)(delayed(basis_func)(i) for i in range(1,csv_nos+1))\n",
    "    res = pd.concat(sensors, axis=0)\n",
    "    res.reset_index(drop=True, inplace=True)\n",
    "    return res\n",
    "\n",
    "def concat_sensors_plc(train_no, csv_nos):\n",
    "    '''\n",
    "    描述：\n",
    "        拼接plc和sensors\n",
    "    '''\n",
    "    input_dir = './test_plc_CL/'\n",
    "    # 拼接\n",
    "    plc = joblib.load(input_dir + 'test_%s.lz4'%train_no)\n",
    "    sensors = concat_sensors(train_no, csv_nos)\n",
    "    \n",
    "    tmp = pd.concat([plc[['csv_no', 'CL', 'CLI', 'spindle_load']],sensors], axis=1)\n",
    "    return tmp \n",
    "    \n",
    "\n",
    "def filter_inf(df):\n",
    "    '''\n",
    "    描述：\n",
    "        过滤有inf值的样本\n",
    "    '''\n",
    "    inf_filter = np.sum(df.values == np.inf, axis=1)\n",
    "    df = df[inf_filter==0]\n",
    "    df.reset_index(drop=True, inplace=True)\n",
    "    print('有%d行inf值'%sum(inf_filter))\n",
    "    return df\n",
    "\n",
    "def filter_nan(df):\n",
    "    '''\n",
    "    描述：\n",
    "        过滤有nan值的样本\n",
    "    注意：\n",
    "        np.nan == np.nan  False\n",
    "    '''\n",
    "    nan_filter = df.isnull().sum(axis=1)>0\n",
    "    df = df[nan_filter==0]\n",
    "    df.reset_index(drop=True, inplace=True)\n",
    "    print('有%d行nan值'%sum(nan_filter))\n",
    "    return df\n",
    "\n",
    "\n",
    "def write_file(train_no, csv_nos):\n",
    "    '''\n",
    "    描述：\n",
    "        拼接文件，去除inf值和nan值，并输出文件\n",
    "    '''\n",
    "    if not os.path.exists('./concats'):\n",
    "        os.mkdir('./concats')\n",
    "    output_dir = './concats/'\n",
    "    dev = concat_sensors_plc(train_no, csv_nos)\n",
    "    dev = filter_inf(dev)\n",
    "    dev = filter_nan(dev)\n",
    "    joblib.dump(dev, output_dir + 'concat_%s.lz4'%train_no, compress='lz4')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.\n",
      "[Parallel(n_jobs=24)]: Batch computation too fast (0.0093s.) Setting batch_size=42.\n",
      "[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished\n",
      "[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.\n",
      "[Parallel(n_jobs=24)]: Batch computation too fast (0.0093s.) Setting batch_size=42.\n",
      "[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished\n",
      "[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.\n",
      "[Parallel(n_jobs=24)]: Batch computation too fast (0.0092s.) Setting batch_size=42.\n",
      "[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "有0行inf值\n",
      "有0行nan值\n",
      "有0行inf值\n",
      "有0行nan值\n",
      "有0行inf值\n",
      "有0行nan值\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.\n",
      "[Parallel(n_jobs=24)]: Batch computation too fast (0.0158s.) Setting batch_size=24.\n",
      "[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished\n",
      "[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.\n",
      "[Parallel(n_jobs=24)]: Batch computation too fast (0.0098s.) Setting batch_size=40.\n",
      "[Parallel(n_jobs=24)]: Done   3 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   5 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done   7 out of  10 | elapsed:    0.0s remaining:    0.0s\n",
      "[Parallel(n_jobs=24)]: Done  10 out of  10 | elapsed:    0.0s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "有0行inf值\n",
      "有0行nan值\n",
      "有0行inf值\n",
      "有0行nan值\n"
     ]
    }
   ],
   "source": [
    "write_file('01', 10)\n",
    "write_file('02', 10)\n",
    "write_file('03', 10)\n",
    "write_file('04', 10)\n",
    "write_file('05', 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import joblib\n",
    "# test = joblib.load('./concats/concat_01.lz4')\n",
    "# test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# plc_01 = joblib.load('./plc_features/test_01.lz4')\n",
    "# plc_01.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
